"""
使用Beautiful Soup库解析html
"""
import requests
from bs4 import BeautifulSoup

baidu_url = 'https://www.baidu.com'

response = requests.get(baidu_url)
response.encoding = 'utf-8'

# html.parser: Python内置的标准库，执行速度适中，文档容错能力强； 使用前提是要安装bs4库
soup = BeautifulSoup(response.text, 'html.parser')
print(soup.title)
# 返回第一个p元素，并没有返回全部p元素
print(soup.p)
# 嵌套标签获取
print(soup.p.a)

# 获取子元素可迭代列表: children
print(soup.p.children)
for index, child in enumerate(soup.p.children):
    print(index, child)

# 获取父元素可迭代列表: parents
print(soup.a.parents)
for index, parent in enumerate(soup.a.parents):
    print(index, parent)

print(soup.a)
# attrs获取节点的全部属性
print(soup.a.attrs)
print(soup.a['name'])
print(soup.a.attrs['name'])
# 返回a元素中的文本内容
print(soup.a.string)

# name为属性标签
print(soup.find_all(name='a'))

# css选择器
print(soup.select('.s_form .s_form_wrapper .fm'))

print(soup.prettify())
